# coding: utf-8


def read_vocab(vocab_dir):
    """读取词汇表"""
    # words = open_file(vocab_dir).read().strip().split('\n')
    with open(vocab_dir) as fp:
        # 如果是py2 则每个值都转化为unicode
        words = [_.strip() for _ in fp.readlines()]
    word_to_id = dict(zip(words, range(len(words))))
    return words, word_to_id


def read_category():
    """读取分类目录，固定"""
    categories = ['安全自动装置',
                    '计量仪表类',
                    '避雷器',
                    '开关柜',
                    '电容器',
                    '通信类',
                    '消弧线圈',
                    '变压器',
                    '其他设备',
                    '端子箱、电源箱、动力箱',
                    '中性点成套装置',
                    '组合电器',
                    '断路器',
                    '电源类',
                    '互感器',
                    '隔离开关',
                    '电抗器',
                    ]

    cat_to_id = dict(zip(categories, range(len(categories))))

    return categories, cat_to_id
